#Alexander F. Gazmararian
#afg2@princeton.edu
#January 19, 2024

#Load packages
library(tidyverse)
library(sandwich)
library(lmtest)
library(modelsummary)
library(kableExtra)
library(broom)
library(scales)
library(viridis)
library(here)
library(stm)

#Load custom functions
source(here("code", "fun", "savefig.r"))
source(here("code", "fun", "gofmap.r"))
source(here("code", "fun", "fix_txt.r"))

#Load data
g <- readRDS(here("data", "qualtrics", "natsurvey.rds"))

#Specify model
f.base <- y ~ age + female + black + hispanic + aapi + rural_bin + college + fullemploy + pid3 + ownhome + income5 + buyself_bin + suitable_bin + index_trust + altruism + risk

#Coefficient name mapping for tables
coefmap <- c(
  "polytreatpolycentric"="Polycentric",
  "polytreatpower"="Power Company",
  
  "polytreatpolycentric:risk"="Polycentric x Risk Preferences",
  "polytreatpolycentric:altruism"="Polycentric x Altruism",
  "polytreatpolycentric:index_trust" = "Polycentric x Trust Index",
  "polytreatpolycentric:buyself_bin"="Polycentric x Solar Interest",
  "polytreatpolycentric:pid3Republican"="Polycentric x Republican",
  "polytreatpolycentric:pid3Neither"="Polycentric x Neither",
  
  "polytreatpower:risk"="Power Company x Risk Preferences",
  "polytreatpower:altruism"="Power Company x Altruism",
  "polytreatpower:index_trust" = "Power Company x Trust Index",
  "polytreatpower:buyself_bin"="Power Company x Solar Interest",
  "polytreatpower:pid3Republican"="Power Company x Republican",
  "polytreatpower:pid3Neither"="Power Company x Neither"
)

# Estimate average treatment effects ----
m <- list()
m[["Sign Up"]] <- lm(update(f.base, poly_signup_num ~ polytreat + .), g)
m[["Help Target Community"]] <- lm(update(f.base, poly_help_num ~ polytreat + .), g)
m[["Successful Solar Deployment"]] <- lm(update(f.base, poly_deploy_num ~ polytreat + .), g)
m[["Legitimacy of Change in Target Community"]] <- lm(update(f.base, change_target_num ~ polytreat + .), g)
m[["Legitimacy of Change in Program"]] <- lm(update(f.base, change_donor_num ~ polytreat + .), g)

m.df <- modelplot(m, vcov = "HC2", draw = FALSE)
m.df <- subset(m.df, term %in% c("polytreatpolycentric", "polytreatpower"))

p.ate <-
  m.df %>%
  mutate(
    lb90 = estimate + std.error * qnorm(.05),
    ub90 = estimate + std.error * qnorm(.95),
    term = case_when(
      term == "polytreatpolycentric" ~ "Polycentric",
      term == "polytreatpower" ~ "Power Company"
    )
  ) %>%
  ggplot(aes(x = estimate, y = model, color = term, shape = term)) +
  geom_vline(xintercept = 0, color = "grey") +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0, position = position_dodge(.6)) +
  geom_errorbar(aes(xmin = lb90, xmax = ub90), width = 0, linewidth = 1.25, position = position_dodge(.6)) +
  geom_point(size = 3, fill = "white", position = position_dodge(.6)) +
  scale_shape_manual(values = c(21, 23)) +
  scale_y_discrete(limits = rev, labels = label_wrap(25)) +
  theme_bw(base_size = 13) +
  scale_color_grey() +
  labs(x = "Average Treatment Effect", y = "Outcome", color = "", shape = "") +
  theme(
    panel.grid = element_blank(),
    legend.position = c(.9, .1),
    legend.background = element_blank()
  )
p.ate
savefig(p.ate, "fig_polycentric", height = 3)

# Analyze Open-Ended Responses----

## Sign-Up ----

# Prepare data
signup <- subset(g, !is.na(polyoe_signup)) 
signup.proc <- textProcessor(signup$polyoe_signup, metadata = signup)
signup.out <- prepDocuments(signup.proc$documents, signup.proc$vocab, signup.proc$meta)

# Topic selection
storage <- searchK(
  documents = signup.out$documents,
  vocab = signup.out$vocab,
  init.type = "Spectral",
  prevalence = ~ polytreat + buyself_bin,
  data = signup.out$meta,
  seed = 20230703,
  heldout.seed = 20230703,
  K = seq(5, 25, 1)
)
plot(storage)

#17 topics

# Estimate model
signup.stm <- stm(
  documents = signup.out$documents,
  vocab = signup.out$vocab,
  init.type = "Spectral",
  K = 17,
  prevalence = ~ polytreat + buyself_bin,
  data = signup.out$meta,
  seed = 20230703
)

# Determine what the topics say
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 1, n = 15)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 2, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 3, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 4, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 5, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 6, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 7, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 8, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 9, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 10, n = 10)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 11, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 12, n = 15)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 13, n = 15)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 14, n = 5)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 15, n = 10)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 16, n = 10)
findThoughts(signup.stm, texts = signup.out$meta$polyoe_signup, topics = 17, n = 10)

signup.topics <- c(
  "Program Uncertainty",
  "Benefit Uncertainty",
  "Unaffordability",
  "Solar Opposition",
  "Installation Challenges",
  "Power Company Trust",
  "Financial Incentive",
  "Donation Skepticism",
  "Government Skepticism",
  "Bottom-Up Better",
  "Information Provision",
  "Desire to Help",
  "Environmental Benefits",
  "Housing Constraints",
  "Power Company Incentives",
  "Electric Bill Savings",
  "Solar Inequalities"
)
png(here("output", "figures", "fig_stm_signup.png"), res = 300, width = 6.5, height = 5, units = "in", pointsize = 10)
plot(signup.stm, topic.names = paste0(signup.topics, ":"))
dev.off()

# Estimate treatment effect
signup.effect <- estimateEffect(
  formula = 1:17 ~ polytreat + age + female + black + hispanic + aapi + rural_bin + college + fullemploy + pid3 + ownhome + income5 + buyself_bin + suitable_bin + buyself_bin,
  stmobj = signup.stm,
  meta = signup.out$meta,
  uncertainty = "Global"
  )
# Estimate effect
stm.m <- list()
stm.m[["Govt"]] <- summary(signup.effect, topics = 9) # decrease in government mentions for polycentric treatment
stm.m[["Savings"]] <- summary(signup.effect, topics = 16) # Electric bill savings
stm.m[["Envt"]] <- summary(signup.effect, topics = 13) # More likely to mention with Polycentric treatment 

# Create table output
make_stm_tab <- function(model, stm.processed) {
  model.df <- data.frame(model$tables[[1]])
  model.df <- rownames_to_column(model.df, var = "term")
  names(model.df) <- c("term", "estimate", "std.error", "t", "p.value")
  gl <- data.frame(
    N = nrow(stm.processed$meta)[[1]]
  )
  mod <- list(tidy = model.df, glance = gl)
  class(mod) <- "modelsummary_list"
  return(mod)
}

stm.m.out <- lapply(stm.m, make_stm_tab, stm.processed = signup.proc)

file <- here("output", "tables", "tab_stm_signup.txt")
modelsummary(
  stm.m.out,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  coef_map = c(
    "polytreatpolycentric"="Polycentric",
    "polytreatpower"="Power Company"
  ),
  add_rows = data.frame(
    c("Covariates"),
    c("Yes"),
    c("Yes"),
    c("Yes")
  ),
  output = "latex"
) %>%
  add_header_above(c(" " = 1, "Outcome:" = 3)) %>%
  cat(., file = file)
fix_txt(file)

## Low-income open-ended ----
lowincome <- subset(g, !is.na(polyoe_lowincome)) 
low.proc <- textProcessor(lowincome$polyoe_lowincome, metadata = lowincome)
low.out <- prepDocuments(low.proc$documents, low.proc$vocab, low.proc$meta)

# Topic selection
income.storage <- searchK(
  documents = low.out$documents,
  vocab = low.out$vocab,
  init.type = "Spectral",
  prevalence = ~ polytreat + buyself_bin,
  data = low.out$meta,
  seed = 20230703,
  heldout.seed = 20230703,
  K = seq(5, 25, 1)
)
plot(income.storage)

#18 topics

# Estimate model
income.stm <- stm(
  documents = low.out$documents,
  vocab = low.out$vocab,
  init.type = "Spectral",
  K = 18,
  prevalence = ~ polytreat + buyself_bin,
  data = low.out$meta,
  seed = 20230703
)

plot(income.stm)

summary(income.stm)

# Determine what the topics say
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 1, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 2, n = 10)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 3, n = 10)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 4, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 5, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 6, n = 10)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 7, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 8, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 9, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 10, n = 8)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 11, n = 8)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 12, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 13, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 14, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 15, n = 5)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 16, n = 10)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 17, n = 10)
findThoughts(income.stm, texts = low.out$meta$polyoe_lowincome, topics = 18, n = 15)

signup.topics <- c(
  "Extra Help Needed",
  "Other Barriers",
  "Implementation Concerns",
  "Won't Work",
  "Slow Government",
  "Low Donations",
  "Success",
  "Low Government Trust",
  "No Donations",
  "Donations Not Go to Target",
  "Affordability",
  "New Income Source",
  "Power Company Barriers",
  "Helps People",
  "Savings",
  "Living Standards",
  "Solar Affordability",
  "Geographic Disparities"
)
png(here("output", "figures", "fig_stm_income.png"), res = 300, width = 6.5, height = 5, units = "in", pointsize = 10)
plot(income.stm, topic.names = paste0(signup.topics, ":"))
dev.off()

# Estimate effect
income.effect <- estimateEffect(
  formula = 1:18 ~ polytreat + age + female + black + hispanic + aapi + rural_bin + college + fullemploy + pid3 + ownhome + income5 + buyself_bin + suitable_bin + buyself_bin,
  income.stm,
  meta = low.out$meta,
  uncertainty = "Global"
  )
#Create model output
m.income <- list()
m.income[["Slow"]] <- summary(income.effect, topics = 5) # big effects
m.income[["Trust"]] <- summary(income.effect, topics = 8) # big effects
m.income[["Utility Barriers"]] <- summary(income.effect, topics = 13) # big effects
m.income[["Solar Savings"]] <- summary(income.effect, topics = 15) # effects

stm.income.out <- lapply(m.income, make_stm_tab, stm.processed = low.proc)

file <- here("output", "tables", "tab_stm_income.txt")
modelsummary(
  stm.income.out,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  coef_map = c(
    "polytreatpolycentric"="Polycentric",
    "polytreatpower"="Power Company"
  ),
  add_rows = data.frame(
    c("Covariates"),
    c("Yes"),
    c("Yes"),
    c("Yes"),
    c("Yes")
  ),
  output = "latex"
) %>%
  add_header_above(c(" " = 1, "Government" = 2, " " = 2)) %>%
  cat(., file = file)
fix_txt(file)
